## Warning: package 'corrplot' was built under R version 3.5.3
## corrplot 0.84 loaded
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'Hmisc' was built under R version 3.5.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.3
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Warning: package 'leaps' was built under R version 3.5.3
## Warning: package 'lubridate' was built under R version 3.5.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Warning: package 'MASS' was built under R version 3.5.3
## Warning: package 'pROC' was built under R version 3.5.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Warning: package 'ROCR' was built under R version 3.5.3
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.3
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ----------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.2.5
## v tidyr 0.8.2 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.3.1
## v tibble 2.1.3 v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## -- Conflicts -------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks MASS::select()
## x lubridate::setdiff() masks base::setdiff()
## x dplyr::src() masks Hmisc::src()
## x dplyr::summarize() masks Hmisc::summarize()
## x lubridate::union() masks base::union()
< Les prédicteurs sont de type entier ou numérique. < Le dataframe ne contient pas de données manquantes. < En revanche, les variables “minute” et “hour” sont à 0 pour toutes les observations. < Dans l’échantillon, le nombre de jours des pluie et de beau temps sont à peu à prés équivalents.
## 'data.frame': 1244 obs. of 47 variables:
## $ X : int 1 3 5 7 9 11 13 15 17 19 ...
## $ Year : int 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
## $ Month : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Day : int 1 3 5 7 9 11 13 15 17 19 ...
## $ Hour : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Minute : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Temperature.daily.mean..2.m.above.gnd. : num 13.4 14.3 19.9 19.3 22.1 ...
## $ Relative.Humidity.daily.mean..2.m.above.gnd.: num 77.9 83.6 66.6 72 74.3 ...
## $ Mean.Sea.Level.Pressure.daily.mean..MSL. : num 1016 1016 1018 1014 1005 ...
## $ Total.Precipitation.daily.sum..sfc. : num 0.1 0 0 0.4 2 1.4 5.1 8.2 4.8 5.3 ...
## $ Snowfall.amount.raw.daily.sum..sfc. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Total.Cloud.Cover.daily.mean..sfc. : num 65 81 0 49.7 58.8 ...
## $ High.Cloud.Cover.daily.mean..high.cld.lay. : num 33.1 31.6 0 24.1 48.4 ...
## $ Medium.Cloud.Cover.daily.mean..mid.cld.lay. : num 36.9 10.8 0 24.3 54.7 ...
## $ Low.Cloud.Cover.daily.mean..low.cld.lay. : num 45.1 80.5 0 49.2 22.2 ...
## $ Sunshine.Duration.daily.sum..sfc. : num 350.8 68.7 891.7 666.7 400.7 ...
## $ Shortwave.Radiation.daily.sum..sfc. : num 5722 3551 8284 7456 6532 ...
## $ Wind.Speed.daily.mean..10.m.above.gnd. : num 8.97 8.61 4.47 10.05 10.73 ...
## $ Wind.Direction.daily.mean..10.m.above.gnd. : num 281 221 172 259 177 ...
## $ Wind.Speed.daily.mean..80.m.above.gnd. : num 11.61 11.64 6.17 12.64 14.2 ...
## $ Wind.Direction.daily.mean..80.m.above.gnd. : num 280 228 194 281 173 ...
## $ Wind.Speed.daily.mean..900.mb. : num 14.6 22.9 10.6 19.9 16.6 ...
## $ Wind.Direction.daily.mean..900.mb. : num 318.3 75.3 167.1 276.3 207.8 ...
## $ Wind.Gust.daily.mean..sfc. : num 12.21 12.76 5.57 16.94 11.99 ...
## $ Temperature.daily.max..2.m.above.gnd. : num 17.6 18.2 25.6 22.6 27.9 ...
## $ Temperature.daily.min..2.m.above.gnd. : num 8.1 10.3 12.3 15.3 15.5 ...
## $ Relative.Humidity.daily.max..2.m.above.gnd. : int 96 96 88 91 89 97 96 96 97 94 ...
## $ Relative.Humidity.daily.min..2.m.above.gnd. : int 61 71 47 54 49 61 78 75 78 69 ...
## $ Mean.Sea.Level.Pressure.daily.max..MSL. : num 1018 1018 1021 1016 1007 ...
## $ Mean.Sea.Level.Pressure.daily.min..MSL. : num 1013 1015 1016 1010 1003 ...
## $ Total.Cloud.Cover.daily.max..sfc. : num 100 100 0 100 100 100 100 100 100 100 ...
## $ Total.Cloud.Cover.daily.min..sfc. : num 1 3.3 0 0.3 3 0 0 0 100 1 ...
## $ High.Cloud.Cover.daily.max..high.cld.lay. : int 100 100 0 100 100 5 100 100 100 100 ...
## $ High.Cloud.Cover.daily.min..high.cld.lay. : int 0 0 0 0 7 0 0 0 0 0 ...
## $ Medium.Cloud.Cover.daily.max..mid.cld.lay. : int 100 100 0 100 100 100 100 100 100 100 ...
## $ Medium.Cloud.Cover.daily.min..mid.cld.lay. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Low.Cloud.Cover.daily.max..low.cld.lay. : int 100 100 0 100 100 100 100 100 100 100 ...
## $ Low.Cloud.Cover.daily.min..low.cld.lay. : int 0 0 0 0 0 0 0 0 100 0 ...
## $ Wind.Speed.daily.max..10.m.above.gnd. : num 14.06 15.26 8.71 16.62 21.65 ...
## $ Wind.Speed.daily.min..10.m.above.gnd. : num 2.41 2.74 0.51 0.51 3.22 0.72 4.68 1.8 3.71 5.59 ...
## $ Wind.Speed.daily.max..80.m.above.gnd. : num 21.6 20.5 18.7 20.9 27.6 ...
## $ Wind.Speed.daily.min..80.m.above.gnd. : num 1.44 3.71 0.72 0.72 2.16 ...
## $ Wind.Speed.daily.max..900.mb. : num 28.8 40.8 22 41.4 25.9 ...
## $ Wind.Speed.daily.min..900.mb. : num 4.39 2.1 0.51 3.6 4.02 ...
## $ Wind.Gust.daily.max..sfc. : num 23.4 24.1 15.8 32.8 27.7 ...
## $ Wind.Gust.daily.min..sfc. : num 3.24 3.24 0.72 0.72 3.24 1.08 5.76 2.88 4.32 8.28 ...
## $ pluie.demain : logi TRUE FALSE TRUE TRUE FALSE TRUE ...
## X Year Month Day
## Min. : 1.0 Min. :2010 Min. : 1.000 Min. : 1.00
## 1st Qu.: 700.5 1st Qu.:2012 1st Qu.: 4.000 1st Qu.: 8.00
## Median :1438.0 Median :2014 Median : 6.000 Median :16.00
## Mean :1446.9 Mean :2014 Mean : 6.493 Mean :15.73
## 3rd Qu.:2183.5 3rd Qu.:2016 3rd Qu.: 9.000 3rd Qu.:23.00
## Max. :2941.0 Max. :2018 Max. :12.000 Max. :31.00
## Hour Minute Temperature.daily.mean..2.m.above.gnd.
## Min. :0 Min. :0 Min. :-7.100
## 1st Qu.:0 1st Qu.:0 1st Qu.: 6.695
## Median :0 Median :0 Median :12.575
## Mean :0 Mean :0 Mean :12.227
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:17.640
## Max. :0 Max. :0 Max. :29.590
## Relative.Humidity.daily.mean..2.m.above.gnd.
## Min. :41.12
## 1st Qu.:65.11
## Median :71.88
## Mean :71.45
## 3rd Qu.:78.42
## Max. :95.25
## Mean.Sea.Level.Pressure.daily.mean..MSL.
## Min. : 978.5
## 1st Qu.:1012.6
## Median :1017.0
## Mean :1017.0
## 3rd Qu.:1021.6
## Max. :1038.6
## Total.Precipitation.daily.sum..sfc. Snowfall.amount.raw.daily.sum..sfc.
## Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.000 1st Qu.: 0.00000
## Median : 0.100 Median : 0.00000
## Mean : 2.213 Mean : 0.06156
## 3rd Qu.: 2.500 3rd Qu.: 0.00000
## Max. :40.300 Max. :13.86000
## Total.Cloud.Cover.daily.mean..sfc.
## Min. : 0.00
## 1st Qu.: 22.20
## Median : 52.88
## Mean : 50.66
## 3rd Qu.: 78.34
## Max. :100.00
## High.Cloud.Cover.daily.mean..high.cld.lay.
## Min. : 0.00
## 1st Qu.: 2.20
## Median :13.23
## Mean :19.75
## 3rd Qu.:32.59
## Max. :94.54
## Medium.Cloud.Cover.daily.mean..mid.cld.lay.
## Min. : 0.000
## 1st Qu.: 2.678
## Median : 23.770
## Mean : 31.284
## 3rd Qu.: 54.710
## Max. :100.000
## Low.Cloud.Cover.daily.mean..low.cld.lay.
## Min. : 0.00
## 1st Qu.: 8.49
## Median : 38.70
## Mean : 39.68
## 3rd Qu.: 63.58
## Max. :100.00
## Sunshine.Duration.daily.sum..sfc. Shortwave.Radiation.daily.sum..sfc.
## Min. : 0.0 Min. : 283.9
## 1st Qu.: 118.4 1st Qu.:2059.5
## Median : 371.5 Median :3576.0
## Mean : 377.4 Mean :3945.7
## 3rd Qu.: 591.9 3rd Qu.:5730.9
## Max. :1022.2 Max. :8337.5
## Wind.Speed.daily.mean..10.m.above.gnd.
## Min. : 2.020
## 1st Qu.: 6.567
## Median : 9.280
## Mean :10.869
## 3rd Qu.:13.400
## Max. :43.750
## Wind.Direction.daily.mean..10.m.above.gnd.
## Min. : 34.51
## 1st Qu.:155.87
## Median :208.99
## Mean :202.09
## 3rd Qu.:251.75
## Max. :337.65
## Wind.Speed.daily.mean..80.m.above.gnd.
## Min. : 2.110
## 1st Qu.: 8.738
## Median :12.375
## Mean :14.393
## 3rd Qu.:17.887
## Max. :54.620
## Wind.Direction.daily.mean..80.m.above.gnd. Wind.Speed.daily.mean..900.mb.
## Min. : 36.64 Min. : 2.38
## 1st Qu.:158.84 1st Qu.:13.39
## Median :215.37 Median :20.50
## Mean :207.04 Mean :25.05
## 3rd Qu.:257.23 3rd Qu.:31.87
## Max. :342.42 Max. :97.69
## Wind.Direction.daily.mean..900.mb. Wind.Gust.daily.mean..sfc.
## Min. : 29.22 Min. : 2.710
## 1st Qu.:149.22 1st Qu.: 9.643
## Median :237.40 Median :13.835
## Mean :208.69 Mean :16.791
## 3rd Qu.:265.66 3rd Qu.:21.210
## Max. :343.97 Max. :70.020
## Temperature.daily.max..2.m.above.gnd.
## Min. :-4.39
## 1st Qu.:10.65
## Median :16.70
## Mean :16.48
## 3rd Qu.:22.45
## Max. :36.22
## Temperature.daily.min..2.m.above.gnd.
## Min. :-11.650
## 1st Qu.: 2.998
## Median : 8.255
## Mean : 8.051
## 3rd Qu.: 13.072
## Max. : 23.360
## Relative.Humidity.daily.max..2.m.above.gnd.
## Min. : 55.0
## 1st Qu.: 83.0
## Median : 89.0
## Mean : 87.9
## 3rd Qu.: 94.0
## Max. :100.0
## Relative.Humidity.daily.min..2.m.above.gnd.
## Min. :21.0
## 1st Qu.:45.0
## Median :54.0
## Mean :54.2
## 3rd Qu.:63.0
## Max. :93.0
## Mean.Sea.Level.Pressure.daily.max..MSL.
## Min. : 981.4
## 1st Qu.:1015.7
## Median :1019.5
## Mean :1019.8
## 3rd Qu.:1024.4
## Max. :1041.8
## Mean.Sea.Level.Pressure.daily.min..MSL. Total.Cloud.Cover.daily.max..sfc.
## Min. : 976.1 Min. : 0.00
## 1st Qu.:1009.4 1st Qu.:100.00
## Median :1014.4 Median :100.00
## Mean :1014.2 Mean : 89.08
## 3rd Qu.:1019.3 3rd Qu.:100.00
## Max. :1037.4 Max. :100.00
## Total.Cloud.Cover.daily.min..sfc.
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 0.0
## Mean : 8.5
## 3rd Qu.: 2.1
## Max. :100.0
## High.Cloud.Cover.daily.max..high.cld.lay.
## Min. : 0.00
## 1st Qu.: 18.00
## Median :100.00
## Mean : 61.99
## 3rd Qu.:100.00
## Max. :100.00
## High.Cloud.Cover.daily.min..high.cld.lay.
## Min. : 0.0000
## 1st Qu.: 0.0000
## Median : 0.0000
## Mean : 0.7146
## 3rd Qu.: 0.0000
## Max. :45.0000
## Medium.Cloud.Cover.daily.max..mid.cld.lay.
## Min. : 0.00
## 1st Qu.: 34.75
## Median :100.00
## Mean : 72.83
## 3rd Qu.:100.00
## Max. :100.00
## Medium.Cloud.Cover.daily.min..mid.cld.lay.
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 1.953
## 3rd Qu.: 0.000
## Max. :100.000
## Low.Cloud.Cover.daily.max..low.cld.lay.
## Min. : 0.00
## 1st Qu.:100.00
## Median :100.00
## Mean : 80.85
## 3rd Qu.:100.00
## Max. :100.00
## Low.Cloud.Cover.daily.min..low.cld.lay.
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 4.551
## 3rd Qu.: 0.000
## Max. :100.000
## Wind.Speed.daily.max..10.m.above.gnd.
## Min. : 4.61
## 1st Qu.:12.31
## Median :17.33
## Mean :19.22
## 3rd Qu.:23.88
## Max. :69.42
## Wind.Speed.daily.min..10.m.above.gnd.
## Min. : 0.000
## 1st Qu.: 1.080
## Median : 2.310
## Mean : 3.681
## 3rd Qu.: 4.800
## Max. :36.040
## Wind.Speed.daily.max..80.m.above.gnd.
## Min. : 4.84
## 1st Qu.:18.19
## Median :23.82
## Mean :25.45
## 3rd Qu.:29.92
## Max. :79.78
## Wind.Speed.daily.min..80.m.above.gnd. Wind.Speed.daily.max..900.mb.
## Min. : 0.000 Min. : 6.13
## 1st Qu.: 1.125 1st Qu.: 25.18
## Median : 2.550 Median : 37.94
## Mean : 4.901 Mean : 42.26
## 3rd Qu.: 6.130 3rd Qu.: 55.51
## Max. :46.130 Max. :124.20
## Wind.Speed.daily.min..900.mb. Wind.Gust.daily.max..sfc.
## Min. : 0.00 Min. : 5.04
## 1st Qu.: 3.26 1st Qu.:19.08
## Median : 7.15 Median :25.92
## Mean :11.37 Mean :29.49
## 3rd Qu.:14.69 3rd Qu.:36.36
## Max. :82.07 Max. :97.92
## Wind.Gust.daily.min..sfc. pluie.demain
## Min. : 0.000 Mode :logical
## 1st Qu.: 2.160 FALSE:625
## Median : 3.960 TRUE :619
## Mean : 6.655
## 3rd Qu.: 8.640
## Max. :51.120
## [1] "Part des jours de pluie"
##
## FALSE TRUE
## 0.5024116 0.4975884
< En première analyse, la pression atmosphérique et la vitesse du vent pourraient nous aider à prédire la survenue de la pluie.
for (i in 2:(ncol(train)-2)){
p <- ggplot(data = train, mapping = aes(x = Date , y = train[,i], col=pluie.demain)) + geom_point() +labs(x = "Date", y = colnames(train[i]))
print(p)}< Au regard de leur distribution, la couverture nuageuse pourrait aussi expliquer la survenue de la pluie. La direction du vent, les rafales de vent ou l’humidité relative pourraient aussi jouer un rôle.
for (i in 2:(ncol(train)-3)){
p <- histogram(~train[,i] | pluie.demain, data=train, type = "percent", col="grey", xlab=colnames(train[i]), breaks=10 )
q <-ggplot(train, aes(x = train[,i], fill = pluie.demain)) +
labs(title = paste("Distribution de la variable",colnames(train[i]), sep=" "),
x = colnames(train[i]), y = "Fréquence",
fill = "Couleur",
subtitle = "Histogramme de distribution") +
geom_density(alpha = 0.4) + # Transparency
guides(fill = guide_legend(override.aes = list(alpha = 1)))
print(p)
print(q)
}rm(p,q,i)< Les coefficients de corrélation sont élevés et positifs entre variables mesurant force du vent et rafales. < Les corrélations linéaires sont négatives entre, d’une part, les minutes d’ensoleillement ou le rayonnement solaire et d’autre part, la nébulosité. < Il faut s’attendre à des problèmes de colinéarité.
mtrain = data.matrix(train[, 2:(ncol(train) - 2)])
corrplot(
cor(mtrain),
type = "lower",
order = "hclust",
tl.col = "black",
tl.srt = 3,
tl.cex = 0.55
)rm(mtrain)|z|)
## (Intercept) 4.42e-13 ## Mean.Sea.Level.Pressure.daily.min..MSL. 0.054997 .
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000293 ## Wind.Direction.daily.mean..900.mb. 3.81e-05 ## Mois02 0.000729 ## Mois03 0.001700 ** ## Mois04 8.62e-05 ## Mois05 0.066416 .
## Mois06 0.363604
## Mois07 0.111875
## Mois08 0.012401
## Mois09 0.004433 ## Mois10 0.002350 ** ## Mois11 0.002824 ** ## Mois12 0.206446
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.056760 .
## Mean.Sea.Level.Pressure.daily.max..MSL. 0.018443 *
## High.Cloud.Cover.daily.min..high.cld.lay. 0.043751 *
## Temperature.daily.max..2.m.above.gnd. 0.000492 * ## Temperature.daily.min..2.m.above.gnd. 0.002182 ## Total.Cloud.Cover.daily.min..sfc. 0.003122 ** ## Low.Cloud.Cover.daily.min..low.cld.lay. 0.018938 *
## Mean.Sea.Level.Pressure.daily.mean..MSL. 0.120894
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1152.01 on 831 degrees of freedom ## Residual deviance: 897.39 on 809 degrees of freedom ## AIC: 943.39 ## ## Number of Fisher Scoring iterations: 4 ```
# Analyse de la déviance
pchisq(1226.68 - 942.68, 884 - 865, lower = F)## [1] 3.755128e-49
print("Test du rapport de vraisemblance du modèle nul contre le modèle 1")## [1] "Test du rapport de vraisemblance du modèle nul contre le modèle 1"
# p-valeur très faible : on rejette le modèle sans covariable. Le modèle est utile.
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 1")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 1"
pchisq(942.53, 865, lower = F) # La p-valeur est faible.Le modèle doit être amélioré.## [1] 0.03385231
###############################################################################################################
# #
# Modele 2 : exhaustivité des variables numeriques et modélisation pas à pas descendante avec le critère AIC #
# #
###############################################################################################################
model2 <- stepAIC(glm(pluie.demain ~ . -Date, data=train[d,], family = binomial(link="logit")),direction="backward", trace = F)
summary(model2)##
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. +
## High.Cloud.Cover.daily.mean..high.cld.lay. + Wind.Direction.daily.mean..10.m.above.gnd. +
## Wind.Speed.daily.mean..80.m.above.gnd. + Wind.Direction.daily.mean..80.m.above.gnd. +
## Wind.Speed.daily.mean..900.mb. + Wind.Direction.daily.mean..900.mb. +
## Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.max..MSL. +
## Mean.Sea.Level.Pressure.daily.min..MSL. + Total.Cloud.Cover.daily.min..sfc. +
## High.Cloud.Cover.daily.min..high.cld.lay. + Medium.Cloud.Cover.daily.max..mid.cld.lay. +
## Low.Cloud.Cover.daily.min..low.cld.lay. + Wind.Speed.daily.min..10.m.above.gnd. +
## Wind.Speed.daily.min..900.mb. + Mois, family = binomial(link = "logit"),
## data = train[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3466 -0.8668 -0.3001 0.8803 2.6061
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 107.267765 15.031133 7.136
## Temperature.daily.mean..2.m.above.gnd. 0.239479 0.066819 3.584
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.010413 0.005471 1.903
## Wind.Direction.daily.mean..10.m.above.gnd. 0.012722 0.006345 2.005
## Wind.Speed.daily.mean..80.m.above.gnd. -0.071261 0.025919 -2.749
## Wind.Direction.daily.mean..80.m.above.gnd. -0.012275 0.006622 -1.854
## Wind.Speed.daily.mean..900.mb. 0.034900 0.015659 2.229
## Wind.Direction.daily.mean..900.mb. 0.006376 0.001650 3.864
## Temperature.daily.min..2.m.above.gnd. -0.241822 0.068492 -3.531
## Mean.Sea.Level.Pressure.daily.max..MSL. -0.063072 0.030341 -2.079
## Mean.Sea.Level.Pressure.daily.min..MSL. -0.044340 0.028104 -1.578
## Total.Cloud.Cover.daily.min..sfc. 0.026575 0.009035 2.941
## High.Cloud.Cover.daily.min..high.cld.lay. -0.052954 0.028783 -1.840
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.009602 0.002671 3.595
## Low.Cloud.Cover.daily.min..low.cld.lay. -0.022741 0.009623 -2.363
## Wind.Speed.daily.min..10.m.above.gnd. 0.086899 0.037099 2.342
## Wind.Speed.daily.min..900.mb. -0.032693 0.017435 -1.875
## Mois02 -1.526803 0.449486 -3.397
## Mois03 -1.506890 0.471192 -3.198
## Mois04 -1.949896 0.505119 -3.860
## Mois05 -1.099664 0.527470 -2.085
## Mois06 -0.680814 0.566494 -1.202
## Mois07 -1.043724 0.601652 -1.735
## Mois08 -1.595593 0.593558 -2.688
## Mois09 -1.638347 0.550473 -2.976
## Mois10 -1.416564 0.459614 -3.082
## Mois11 -1.374426 0.447773 -3.069
## Mois12 -0.465689 0.444194 -1.048
## Pr(>|z|)
## (Intercept) 9.58e-13 ***
## Temperature.daily.mean..2.m.above.gnd. 0.000338 ***
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.057000 .
## Wind.Direction.daily.mean..10.m.above.gnd. 0.044961 *
## Wind.Speed.daily.mean..80.m.above.gnd. 0.005970 **
## Wind.Direction.daily.mean..80.m.above.gnd. 0.063810 .
## Wind.Speed.daily.mean..900.mb. 0.025829 *
## Wind.Direction.daily.mean..900.mb. 0.000112 ***
## Temperature.daily.min..2.m.above.gnd. 0.000415 ***
## Mean.Sea.Level.Pressure.daily.max..MSL. 0.037639 *
## Mean.Sea.Level.Pressure.daily.min..MSL. 0.114626
## Total.Cloud.Cover.daily.min..sfc. 0.003268 **
## High.Cloud.Cover.daily.min..high.cld.lay. 0.065801 .
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000325 ***
## Low.Cloud.Cover.daily.min..low.cld.lay. 0.018115 *
## Wind.Speed.daily.min..10.m.above.gnd. 0.019162 *
## Wind.Speed.daily.min..900.mb. 0.060780 .
## Mois02 0.000682 ***
## Mois03 0.001384 **
## Mois04 0.000113 ***
## Mois05 0.037088 *
## Mois06 0.229440
## Mois07 0.082782 .
## Mois08 0.007184 **
## Mois09 0.002918 **
## Mois10 0.002056 **
## Mois11 0.002144 **
## Mois12 0.294459
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 886.83 on 804 degrees of freedom
## AIC: 942.83
##
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 2")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 2"
pchisq(919.25, 859, lower = F)# La p-valeur reste faible (7,5%).Le modèle peut sans doute être amélioré.## [1] 0.0753681
###############################################################################################################
# #
# Modele 3 : Ajout de dummies et modélisation pas à pas descendante avec le critère AIC #
# variables indicatrices choisies à partir de l'analyse des distributions #
###############################################################################################################
model3 <- stepAIC(glm(pluie.demain ~ . -Date +I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015) +I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.> 1 ) + I(Total.Cloud.Cover.daily.mean..sfc.>50) + I(High.Cloud.Cover.daily.mean..high.cld.lay.>15) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Direction.daily.mean..10.m.above.gnd.>150)+I(Wind.Speed.daily.mean..900.mb.<15), data=train[d,], family = binomial(link="logit")),direction="backward", trace = F)
summary(model3)##
## Call:
## glm(formula = pluie.demain ~ Total.Cloud.Cover.daily.mean..sfc. +
## High.Cloud.Cover.daily.mean..high.cld.lay. + Low.Cloud.Cover.daily.mean..low.cld.lay. +
## Wind.Direction.daily.mean..10.m.above.gnd. + Wind.Speed.daily.mean..80.m.above.gnd. +
## Wind.Direction.daily.mean..80.m.above.gnd. + Wind.Speed.daily.mean..900.mb. +
## Wind.Direction.daily.mean..900.mb. + Temperature.daily.max..2.m.above.gnd. +
## Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.max..MSL. +
## Total.Cloud.Cover.daily.min..sfc. + High.Cloud.Cover.daily.min..high.cld.lay. +
## Medium.Cloud.Cover.daily.max..mid.cld.lay. + Low.Cloud.Cover.daily.min..low.cld.lay. +
## Wind.Speed.daily.min..10.m.above.gnd. + Wind.Speed.daily.min..900.mb. +
## Mois + I(Sunshine.Duration.daily.sum..sfc. < 250), family = binomial(link = "logit"),
## data = train[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6053 -0.8678 -0.2845 0.8925 2.4198
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 107.278492 15.243286
## Total.Cloud.Cover.daily.mean..sfc. -0.021589 0.010224
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.013312 0.006478
## Low.Cloud.Cover.daily.mean..low.cld.lay. 0.017451 0.008261
## Wind.Direction.daily.mean..10.m.above.gnd. 0.013548 0.006449
## Wind.Speed.daily.mean..80.m.above.gnd. -0.073401 0.025705
## Wind.Direction.daily.mean..80.m.above.gnd. -0.012960 0.006702
## Wind.Speed.daily.mean..900.mb. 0.037726 0.015500
## Wind.Direction.daily.mean..900.mb. 0.006859 0.001661
## Temperature.daily.max..2.m.above.gnd. 0.160613 0.044898
## Temperature.daily.min..2.m.above.gnd. -0.170790 0.048569
## Mean.Sea.Level.Pressure.daily.max..MSL. -0.107728 0.014841
## Total.Cloud.Cover.daily.min..sfc. 0.024680 0.009438
## High.Cloud.Cover.daily.min..high.cld.lay. -0.049467 0.028970
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.012472 0.003045
## Low.Cloud.Cover.daily.min..low.cld.lay. -0.024118 0.009856
## Wind.Speed.daily.min..10.m.above.gnd. 0.087502 0.037367
## Wind.Speed.daily.min..900.mb. -0.034248 0.017190
## Mois02 -1.486047 0.451934
## Mois03 -1.536682 0.479477
## Mois04 -1.852143 0.508465
## Mois05 -1.010525 0.533413
## Mois06 -0.564789 0.567759
## Mois07 -0.935005 0.599852
## Mois08 -1.498265 0.595950
## Mois09 -1.548971 0.552013
## Mois10 -1.348912 0.465607
## Mois11 -1.347134 0.450773
## Mois12 -0.482738 0.445234
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 0.573656 0.287425
## z value Pr(>|z|)
## (Intercept) 7.038 1.95e-12 ***
## Total.Cloud.Cover.daily.mean..sfc. -2.112 0.034713 *
## High.Cloud.Cover.daily.mean..high.cld.lay. 2.055 0.039895 *
## Low.Cloud.Cover.daily.mean..low.cld.lay. 2.112 0.034653 *
## Wind.Direction.daily.mean..10.m.above.gnd. 2.101 0.035672 *
## Wind.Speed.daily.mean..80.m.above.gnd. -2.856 0.004296 **
## Wind.Direction.daily.mean..80.m.above.gnd. -1.934 0.053132 .
## Wind.Speed.daily.mean..900.mb. 2.434 0.014937 *
## Wind.Direction.daily.mean..900.mb. 4.129 3.64e-05 ***
## Temperature.daily.max..2.m.above.gnd. 3.577 0.000347 ***
## Temperature.daily.min..2.m.above.gnd. -3.516 0.000437 ***
## Mean.Sea.Level.Pressure.daily.max..MSL. -7.259 3.91e-13 ***
## Total.Cloud.Cover.daily.min..sfc. 2.615 0.008920 **
## High.Cloud.Cover.daily.min..high.cld.lay. -1.708 0.087724 .
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 4.096 4.20e-05 ***
## Low.Cloud.Cover.daily.min..low.cld.lay. -2.447 0.014409 *
## Wind.Speed.daily.min..10.m.above.gnd. 2.342 0.019196 *
## Wind.Speed.daily.min..900.mb. -1.992 0.046338 *
## Mois02 -3.288 0.001008 **
## Mois03 -3.205 0.001351 **
## Mois04 -3.643 0.000270 ***
## Mois05 -1.894 0.058165 .
## Mois06 -0.995 0.319849
## Mois07 -1.559 0.119061
## Mois08 -2.514 0.011934 *
## Mois09 -2.806 0.005015 **
## Mois10 -2.897 0.003766 **
## Mois11 -2.988 0.002804 **
## Mois12 -1.084 0.278260
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 1.996 0.045950 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 881.35 on 802 degrees of freedom
## AIC: 941.35
##
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 3")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 3"
pchisq(914.85, 857, lower = F) # La p-valeur - 8,3% - s'améliore un peu. ## [1] 0.08331966
###############################################################################################################
# #
# Modele 4 : Idem mais modélisation progressive avec le critère AIC #
# #
###############################################################################################################
formule4 <- as.formula(paste("pluie.demain ~",paste(names(train[, predicteurs]),collapse = "+"), paste("+Mois+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) + I(High.Cloud.Cover.daily.mean..high.cld.lay. >15) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015)")))
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 4")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 4"
model4 <- stepAIC(glm(pluie.demain ~ 1, data=train[d,], family = binomial(link="logit")),direction="both", scope=list(upper=formule4), trace = F)
summary(model4)##
## Call:
## glm(formula = pluie.demain ~ Mean.Sea.Level.Pressure.daily.min..MSL. +
## Medium.Cloud.Cover.daily.max..mid.cld.lay. + Wind.Direction.daily.mean..900.mb. +
## Mois + I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15) +
## Mean.Sea.Level.Pressure.daily.max..MSL. + Mean.Sea.Level.Pressure.daily.mean..MSL. +
## I(Sunshine.Duration.daily.sum..sfc. < 250) + Total.Cloud.Cover.daily.mean..sfc. +
## Total.Cloud.Cover.daily.min..sfc. + Low.Cloud.Cover.daily.min..low.cld.lay. +
## High.Cloud.Cover.daily.min..high.cld.lay., family = binomial(link = "logit"),
## data = train[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3589 -0.8674 -0.3324 0.8954 2.3333
##
## Coefficients:
## Estimate
## (Intercept) 108.457661
## Mean.Sea.Level.Pressure.daily.min..MSL. -0.154413
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.010383
## Wind.Direction.daily.mean..900.mb. 0.004311
## Mois02 -1.392205
## Mois03 -1.153179
## Mois04 -1.456125
## Mois05 -0.529835
## Mois06 -0.194013
## Mois07 -0.644206
## Mois08 -1.154428
## Mois09 -1.215988
## Mois10 -1.153747
## Mois11 -1.297263
## Mois12 -0.583244
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE 0.410521
## Mean.Sea.Level.Pressure.daily.max..MSL. -0.166900
## Mean.Sea.Level.Pressure.daily.mean..MSL. 0.214020
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 0.655360
## Total.Cloud.Cover.daily.mean..sfc. -0.013517
## Total.Cloud.Cover.daily.min..sfc. 0.026176
## Low.Cloud.Cover.daily.min..low.cld.lay. -0.024667
## High.Cloud.Cover.daily.min..high.cld.lay. -0.041816
## Std. Error z value
## (Intercept) 14.642710 7.407
## Mean.Sea.Level.Pressure.daily.min..MSL. 0.075055 -2.057
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.002866 3.623
## Wind.Direction.daily.mean..900.mb. 0.001233 3.497
## Mois02 0.441715 -3.152
## Mois03 0.443031 -2.603
## Mois04 0.435937 -3.340
## Mois05 0.414463 -1.278
## Mois06 0.413740 -0.469
## Mois07 0.421459 -1.529
## Mois08 0.417413 -2.766
## Mois09 0.435429 -2.793
## Mois10 0.404233 -2.854
## Mois11 0.426787 -3.040
## Mois12 0.433197 -1.346
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE 0.196005 2.094
## Mean.Sea.Level.Pressure.daily.max..MSL. 0.074909 -2.228
## Mean.Sea.Level.Pressure.daily.mean..MSL. 0.139011 1.540
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 0.277350 2.363
## Total.Cloud.Cover.daily.mean..sfc. 0.005338 -2.532
## Total.Cloud.Cover.daily.min..sfc. 0.009237 2.834
## Low.Cloud.Cover.daily.min..low.cld.lay. 0.009471 -2.605
## High.Cloud.Cover.daily.min..high.cld.lay. 0.026467 -1.580
## Pr(>|z|)
## (Intercept) 1.29e-13 ***
## Mean.Sea.Level.Pressure.daily.min..MSL. 0.039654 *
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000291 ***
## Wind.Direction.daily.mean..900.mb. 0.000471 ***
## Mois02 0.001623 **
## Mois03 0.009243 **
## Mois04 0.000837 ***
## Mois05 0.201121
## Mois06 0.639123
## Mois07 0.126385
## Mois08 0.005680 **
## Mois09 0.005228 **
## Mois10 0.004315 **
## Mois11 0.002369 **
## Mois12 0.178182
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE 0.036220 *
## Mean.Sea.Level.Pressure.daily.max..MSL. 0.025879 *
## Mean.Sea.Level.Pressure.daily.mean..MSL. 0.123661
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 0.018131 *
## Total.Cloud.Cover.daily.mean..sfc. 0.011331 *
## Total.Cloud.Cover.daily.min..sfc. 0.004598 **
## Low.Cloud.Cover.daily.min..low.cld.lay. 0.009199 **
## High.Cloud.Cover.daily.min..high.cld.lay. 0.114117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 903.21 on 809 degrees of freedom
## AIC: 949.21
##
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 4")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 4"
pchisq(942.53,865, lower = F)## [1] 0.03385231
###############################################################################################################
# #
# Modele 5 : Modélisation descendante avec le critère AIC #
# Ajout d'interactions entre variables #
###############################################################################################################
model5 <- stepAIC(glm(pluie.demain ~ . - Date + I(Temperature.daily.mean..2.m.above.gnd.^2)+(Wind.Speed.daily.mean..900.mb.^2)+I(Total.Cloud.Cover.daily.mean..sfc.^2)+I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)+I(Total.Precipitation.daily.sum..sfc.^2) +Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.+Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd.+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015), data=train[d,], family = binomial(link="logit")),direction="backward", trace =F)
summary(model5)##
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. +
## Relative.Humidity.daily.mean..2.m.above.gnd. + Total.Precipitation.daily.sum..sfc. +
## Total.Cloud.Cover.daily.mean..sfc. + High.Cloud.Cover.daily.mean..high.cld.lay. +
## Low.Cloud.Cover.daily.mean..low.cld.lay. + Wind.Speed.daily.mean..10.m.above.gnd. +
## Wind.Direction.daily.mean..10.m.above.gnd. + Wind.Direction.daily.mean..80.m.above.gnd. +
## Wind.Speed.daily.mean..900.mb. + Wind.Direction.daily.mean..900.mb. +
## Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.min..MSL. +
## Total.Cloud.Cover.daily.min..sfc. + High.Cloud.Cover.daily.min..high.cld.lay. +
## Medium.Cloud.Cover.daily.max..mid.cld.lay. + Low.Cloud.Cover.daily.min..low.cld.lay. +
## Wind.Speed.daily.min..10.m.above.gnd. + Wind.Speed.daily.min..900.mb. +
## Mois + I(Temperature.daily.mean..2.m.above.gnd.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) +
## I(Total.Precipitation.daily.sum..sfc.^2) + I(Total.Precipitation.daily.sum..sfc. >
## 1) + I(Sunshine.Duration.daily.sum..sfc. < 250) + Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.,
## family = binomial(link = "logit"), data = train[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5470 -0.8390 -0.2663 0.8719 2.2417
##
## Coefficients:
## Estimate
## (Intercept) 8.133e+01
## Temperature.daily.mean..2.m.above.gnd. -1.557e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. -5.178e-02
## Total.Precipitation.daily.sum..sfc. -1.300e-01
## Total.Cloud.Cover.daily.mean..sfc. -1.937e-02
## High.Cloud.Cover.daily.mean..high.cld.lay. 1.463e-02
## Low.Cloud.Cover.daily.mean..low.cld.lay. 1.767e-02
## Wind.Speed.daily.mean..10.m.above.gnd. -1.201e-01
## Wind.Direction.daily.mean..10.m.above.gnd. 1.515e-02
## Wind.Direction.daily.mean..80.m.above.gnd. -1.291e-02
## Wind.Speed.daily.mean..900.mb. 3.597e-02
## Wind.Direction.daily.mean..900.mb. 6.610e-03
## Temperature.daily.min..2.m.above.gnd. -2.974e-01
## Mean.Sea.Level.Pressure.daily.min..MSL. -4.810e-02
## Total.Cloud.Cover.daily.min..sfc. 2.773e-02
## High.Cloud.Cover.daily.min..high.cld.lay. -7.282e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 1.132e-02
## Low.Cloud.Cover.daily.min..low.cld.lay. -2.523e-02
## Wind.Speed.daily.min..10.m.above.gnd. 1.041e-01
## Wind.Speed.daily.min..900.mb. -2.986e-02
## Mois02 -1.889e+00
## Mois03 -1.590e+00
## Mois04 -1.806e+00
## Mois05 -1.142e+00
## Mois06 -8.745e-01
## Mois07 -1.423e+00
## Mois08 -2.066e+00
## Mois09 -1.718e+00
## Mois10 -1.323e+00
## Mois11 -1.344e+00
## Mois12 -5.558e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2) 7.794e-03
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -2.950e-05
## I(Total.Precipitation.daily.sum..sfc.^2) 5.513e-03
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE 5.483e-01
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 5.188e-01
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 3.843e-03
## Std. Error
## (Intercept) 1.784e+01
## Temperature.daily.mean..2.m.above.gnd. 1.353e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. 2.426e-02
## Total.Precipitation.daily.sum..sfc. 7.583e-02
## Total.Cloud.Cover.daily.mean..sfc. 1.061e-02
## High.Cloud.Cover.daily.mean..high.cld.lay. 6.765e-03
## Low.Cloud.Cover.daily.mean..low.cld.lay. 8.816e-03
## Wind.Speed.daily.mean..10.m.above.gnd. 3.733e-02
## Wind.Direction.daily.mean..10.m.above.gnd. 6.534e-03
## Wind.Direction.daily.mean..80.m.above.gnd. 6.712e-03
## Wind.Speed.daily.mean..900.mb. 1.649e-02
## Wind.Direction.daily.mean..900.mb. 1.721e-03
## Temperature.daily.min..2.m.above.gnd. 8.099e-02
## Mean.Sea.Level.Pressure.daily.min..MSL. 2.917e-02
## Total.Cloud.Cover.daily.min..sfc. 9.976e-03
## High.Cloud.Cover.daily.min..high.cld.lay. 3.419e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 3.112e-03
## Low.Cloud.Cover.daily.min..low.cld.lay. 1.036e-02
## Wind.Speed.daily.min..10.m.above.gnd. 3.937e-02
## Wind.Speed.daily.min..900.mb. 1.808e-02
## Mois02 4.796e-01
## Mois03 5.298e-01
## Mois04 5.859e-01
## Mois05 6.031e-01
## Mois06 6.379e-01
## Mois07 6.728e-01
## Mois08 6.683e-01
## Mois09 6.172e-01
## Mois10 5.051e-01
## Mois11 4.669e-01
## Mois12 4.466e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2) 2.012e-03
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 1.523e-05
## I(Total.Precipitation.daily.sum..sfc.^2) 3.280e-03
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE 3.140e-01
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 2.921e-01
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 1.510e-03
## z value
## (Intercept) 4.558
## Temperature.daily.mean..2.m.above.gnd. -1.151
## Relative.Humidity.daily.mean..2.m.above.gnd. -2.134
## Total.Precipitation.daily.sum..sfc. -1.714
## Total.Cloud.Cover.daily.mean..sfc. -1.826
## High.Cloud.Cover.daily.mean..high.cld.lay. 2.162
## Low.Cloud.Cover.daily.mean..low.cld.lay. 2.004
## Wind.Speed.daily.mean..10.m.above.gnd. -3.218
## Wind.Direction.daily.mean..10.m.above.gnd. 2.319
## Wind.Direction.daily.mean..80.m.above.gnd. -1.923
## Wind.Speed.daily.mean..900.mb. 2.181
## Wind.Direction.daily.mean..900.mb. 3.840
## Temperature.daily.min..2.m.above.gnd. -3.671
## Mean.Sea.Level.Pressure.daily.min..MSL. -1.649
## Total.Cloud.Cover.daily.min..sfc. 2.779
## High.Cloud.Cover.daily.min..high.cld.lay. -2.130
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 3.637
## Low.Cloud.Cover.daily.min..low.cld.lay. -2.434
## Wind.Speed.daily.min..10.m.above.gnd. 2.645
## Wind.Speed.daily.min..900.mb. -1.652
## Mois02 -3.939
## Mois03 -3.001
## Mois04 -3.082
## Mois05 -1.893
## Mois06 -1.371
## Mois07 -2.115
## Mois08 -3.092
## Mois09 -2.784
## Mois10 -2.619
## Mois11 -2.878
## Mois12 -1.244
## I(Temperature.daily.mean..2.m.above.gnd.^2) 3.873
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -1.936
## I(Total.Precipitation.daily.sum..sfc.^2) 1.681
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE 1.746
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 1.776
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 2.545
## Pr(>|z|)
## (Intercept) 5.15e-06
## Temperature.daily.mean..2.m.above.gnd. 0.249792
## Relative.Humidity.daily.mean..2.m.above.gnd. 0.032852
## Total.Precipitation.daily.sum..sfc. 0.086498
## Total.Cloud.Cover.daily.mean..sfc. 0.067887
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.030583
## Low.Cloud.Cover.daily.mean..low.cld.lay. 0.045046
## Wind.Speed.daily.mean..10.m.above.gnd. 0.001291
## Wind.Direction.daily.mean..10.m.above.gnd. 0.020386
## Wind.Direction.daily.mean..80.m.above.gnd. 0.054511
## Wind.Speed.daily.mean..900.mb. 0.029169
## Wind.Direction.daily.mean..900.mb. 0.000123
## Temperature.daily.min..2.m.above.gnd. 0.000241
## Mean.Sea.Level.Pressure.daily.min..MSL. 0.099120
## Total.Cloud.Cover.daily.min..sfc. 0.005448
## High.Cloud.Cover.daily.min..high.cld.lay. 0.033200
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000276
## Low.Cloud.Cover.daily.min..low.cld.lay. 0.014922
## Wind.Speed.daily.min..10.m.above.gnd. 0.008173
## Wind.Speed.daily.min..900.mb. 0.098597
## Mois02 8.19e-05
## Mois03 0.002694
## Mois04 0.002059
## Mois05 0.058314
## Mois06 0.170420
## Mois07 0.034468
## Mois08 0.001988
## Mois09 0.005365
## Mois10 0.008818
## Mois11 0.004001
## Mois12 0.213351
## I(Temperature.daily.mean..2.m.above.gnd.^2) 0.000107
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 0.052831
## I(Total.Precipitation.daily.sum..sfc.^2) 0.092788
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE 0.080801
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 0.075725
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.010940
##
## (Intercept) ***
## Temperature.daily.mean..2.m.above.gnd.
## Relative.Humidity.daily.mean..2.m.above.gnd. *
## Total.Precipitation.daily.sum..sfc. .
## Total.Cloud.Cover.daily.mean..sfc. .
## High.Cloud.Cover.daily.mean..high.cld.lay. *
## Low.Cloud.Cover.daily.mean..low.cld.lay. *
## Wind.Speed.daily.mean..10.m.above.gnd. **
## Wind.Direction.daily.mean..10.m.above.gnd. *
## Wind.Direction.daily.mean..80.m.above.gnd. .
## Wind.Speed.daily.mean..900.mb. *
## Wind.Direction.daily.mean..900.mb. ***
## Temperature.daily.min..2.m.above.gnd. ***
## Mean.Sea.Level.Pressure.daily.min..MSL. .
## Total.Cloud.Cover.daily.min..sfc. **
## High.Cloud.Cover.daily.min..high.cld.lay. *
## Medium.Cloud.Cover.daily.max..mid.cld.lay. ***
## Low.Cloud.Cover.daily.min..low.cld.lay. *
## Wind.Speed.daily.min..10.m.above.gnd. **
## Wind.Speed.daily.min..900.mb. .
## Mois02 ***
## Mois03 **
## Mois04 **
## Mois05 .
## Mois06
## Mois07 *
## Mois08 **
## Mois09 **
## Mois10 **
## Mois11 **
## Mois12
## I(Temperature.daily.mean..2.m.above.gnd.^2) ***
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) .
## I(Total.Precipitation.daily.sum..sfc.^2) .
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE .
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE .
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 859.66 on 795 degrees of freedom
## AIC: 933.66
##
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 5")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 5"
pchisq(909.75,857, lower = F) #La p-valeur s'améliore. A un seuil de 10%, ce modèle est acceptable.## [1] 0.1029499
###############################################################################################################
# #
# Modele 6 : Modélisation descendante avec le critère AIC #
# Ajout d'interactions entre variables, et de liaisons quadratiques #
###############################################################################################################
formule6 <- as.formula(paste("pluie.demain ~",paste(names(train[, predicteurs]),collapse = "+"), paste("+Mois++ I(Temperature.daily.mean..2.m.above.gnd.^2)+(Wind.Speed.daily.mean..900.mb.^2)+I(Total.Cloud.Cover.daily.mean..sfc.^2)+I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)+I(Total.Precipitation.daily.sum..sfc.^2) +Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.+Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd.+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015)")))
model6 <- stepAIC(glm(pluie.demain ~ 1 -Mois, data=train[d,], family = binomial(link="logit")),direction="both", scope=list(upper=formule6), trace = F)
summary(model6)##
## Call:
## glm(formula = pluie.demain ~ Mean.Sea.Level.Pressure.daily.min..MSL. +
## Medium.Cloud.Cover.daily.max..mid.cld.lay. + Wind.Direction.daily.mean..900.mb. +
## Mois + I(Temperature.daily.mean..2.m.above.gnd.^2) + Temperature.daily.min..2.m.above.gnd. +
## I(Sunshine.Duration.daily.sum..sfc. < 250) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) +
## High.Cloud.Cover.daily.max..high.cld.lay. + Mean.Sea.Level.Pressure.daily.mean..MSL.,
## family = binomial(link = "logit"), data = train[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3316 -0.8797 -0.3044 0.8635 2.2468
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.820e+01 3.699e+01
## Mean.Sea.Level.Pressure.daily.min..MSL. -1.434e-01 7.472e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 7.345e-03 2.973e-03
## Wind.Direction.daily.mean..900.mb. 5.712e-03 1.325e-03
## Mois02 -1.448e+00 4.354e-01
## Mois03 -9.377e-01 4.415e-01
## Mois04 -1.133e+00 4.495e-01
## Mois05 -3.984e-01 4.697e-01
## Mois06 -2.219e-01 5.050e-01
## Mois07 -7.934e-01 5.500e-01
## Mois08 -1.289e+00 5.464e-01
## Mois09 -1.009e+00 5.063e-01
## Mois10 -8.680e-01 4.373e-01
## Mois11 -9.277e-01 4.275e-01
## Mois12 -6.746e-01 4.360e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2) 6.238e-03 1.350e-03
## Temperature.daily.min..2.m.above.gnd. -1.595e-01 4.015e-02
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 5.620e-01 2.003e-01
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -7.961e-05 3.642e-05
## High.Cloud.Cover.daily.max..high.cld.lay. 5.170e-03 2.696e-03
## Mean.Sea.Level.Pressure.daily.mean..MSL. 2.050e-01 1.378e-01
## z value Pr(>|z|)
## (Intercept) 0.492 0.622751
## Mean.Sea.Level.Pressure.daily.min..MSL. -1.919 0.055003 .
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 2.471 0.013484 *
## Wind.Direction.daily.mean..900.mb. 4.309 1.64e-05 ***
## Mois02 -3.325 0.000884 ***
## Mois03 -2.124 0.033673 *
## Mois04 -2.520 0.011747 *
## Mois05 -0.848 0.396373
## Mois06 -0.439 0.660395
## Mois07 -1.442 0.149177
## Mois08 -2.360 0.018293 *
## Mois09 -1.992 0.046398 *
## Mois10 -1.985 0.047127 *
## Mois11 -2.170 0.030005 *
## Mois12 -1.547 0.121796
## I(Temperature.daily.mean..2.m.above.gnd.^2) 4.622 3.80e-06 ***
## Temperature.daily.min..2.m.above.gnd. -3.972 7.11e-05 ***
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE 2.806 0.005010 **
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -2.186 0.028803 *
## High.Cloud.Cover.daily.max..high.cld.lay. 1.918 0.055156 .
## Mean.Sea.Level.Pressure.daily.mean..MSL. 1.487 0.136933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 897.56 on 811 degrees of freedom
## AIC: 939.56
##
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 6")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 6"
pchisq(927.94,862, lower = F) #La p-valeur reste modeste.## [1] 0.05875778
rm(formule, formule4, formule6)< Les modèles peuvent sembler insuffisants. Une des voies d’amélioration reside dans la discrétisation des variables numériques. < La discrétisation peut constituer une réponse à l’absence de lien non linéaire entre la variable numerique et la variable réponse.
#Pas de lien évident avec la variable réponse
rm(q, tempmoy)
###############################################################################################################
# Humidite relative #
###############################################################################################################
q <-quantile(train$Relative.Humidity.daily.mean..2.m.above.gnd.,seq(0, 1, by = 0.1))
qhumiditemoy.2m <-cut(train$Relative.Humidity.daily.mean..2.m.above.gnd, q)
barplot(prop.table(table(qhumiditemoy.2m, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8),
las = 3,main = "Relative.Humidity.daily.mean..2.m.above.gnd.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# A nouveau, pas de lien évident avec la variable réponse
rm(q,qhumiditemoy.2m)
###############################################################################################################
# Couverture nuageuse moyenne en pourcentage #
###############################################################################################################
q <-quantile(train$Total.Cloud.Cover.daily.mean..sfc.,seq(0, 1, by = 0.1))
nebulositemoy <-cut(train$Total.Cloud.Cover.daily.mean..sfc., q)
barplot(prop.table(table(nebulositemoy, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8),
las = 3,main = "Total.Cloud.Cover.daily.mean..sfc.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement au seuil de 63%
train2<-train
train2$nebulositemoy =NA
train2$nebulositemoy[train$Total.Cloud.Cover.daily.mean..sfc.< 63]="Degage ou partiellement couvert"
train2$nebulositemoy[train$Total.Cloud.Cover.daily.mean..sfc.>=63]="Couvert"
rm(q,nebulositemoy)
###############################################################################################################
# Pression atmospherique au niveau de la mer #
###############################################################################################################
q <-quantile(train$Mean.Sea.Level.Pressure.daily.mean..MSL.,seq(0, 1, by = 0.1))
pressionmoy <-cut(train$Mean.Sea.Level.Pressure.daily.mean..MSL., q)
barplot(prop.table(table(pressionmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
main = "Mean.Sea.Level.Pressure.daily.mean..MSL.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement au seuil de 1015 hPa
train2$pressionmoy =NA
train2$pressionmoy[train$Mean.Sea.Level.Pressure.daily.mean..MSL.< 1017]="Pression basse et moyenne"
train2$pressionmoy[train$Mean.Sea.Level.Pressure.daily.mean..MSL.>=1017]="Haute pression"
rm(q,pressionmoy)
###############################################################################################################
# Precipitation moyenne #
###############################################################################################################
q <-select(train,Total.Precipitation.daily.sum..sfc.) %>% filter(Total.Precipitation.daily.sum..sfc.>0) %>% quantile(train$Total.Precipitation.daily.sum..sfc.,probs=seq(0,1,0.1))
precipmoy<-cut(train$Total.Precipitation.daily.sum..sfc, q)
barplot(prop.table(table(precipmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
main = "Total.Precipitation.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement pour les précipitations moyennes
train2 <- mutate(train2, precipmoy = case_when(
Total.Precipitation.daily.sum..sfc. ==0 ~ "Precipit. nulle",
Total.Precipitation.daily.sum..sfc. > 0 & Total.Precipitation.daily.sum..sfc. <= 0.2 ~ "Precip. faible",
Total.Precipitation.daily.sum..sfc. > 0.2 ~ "Precip. moy. ou forte"))
rm(q,precipmoy)
###############################################################################################################
# Chute de neige #
###############################################################################################################
q <- quantile(train$Snowfall.amount.raw.daily.sum..sfc.[train$Snowfall.amount.raw.daily.sum..sfc.>0],probs=seq(0,1,0.25))
neigemoy <-cut(train$Snowfall.amount.raw.daily.sum..sfc., q)
barplot(prop.table(table(neigemoy, train$pluie.demain), 1)[, 2],ylim = c(0, 1),las = 3,
main = "Snowfall.amount.raw.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement pour les chutes de neige
train2 <- mutate(train2, neigemoy = case_when(
Snowfall.amount.raw.daily.sum..sfc. ==0 ~ "Pas de neige",
Snowfall.amount.raw.daily.sum..sfc. > 0 ~ "Chute de neige"))
rm(q,neigemoy)
###############################################################################################################
# Temps d'ensoleillement #
###############################################################################################################
q <-quantile(train$Sunshine.Duration.daily.sum..sfc.[train$Sunshine.Duration.daily.sum..sfc.>0],probs=seq(0,1,0.1))
soleilmoy <-cut(train$Sunshine.Duration.daily.sum..sfc., q)
barplot(prop.table(table(soleilmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
main = "Sunshine.Duration.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement pour l'ensoleillement
train2 <- mutate(train2, soleilmoy = case_when(
Sunshine.Duration.daily.sum..sfc. ==0 ~ "Pas d'ensoleillem.",
Sunshine.Duration.daily.sum..sfc. > 0 & Sunshine.Duration.daily.sum..sfc.<=328 ~ "Peu d'ensoleillem.",
Sunshine.Duration.daily.sum..sfc. > 328 ~ "Ensoleillem."))
rm(q,soleilmoy)
###############################################################################################################
# Rayonnement solaire #
###############################################################################################################
q <-quantile(train$Shortwave.Radiation.daily.sum..sfc.,probs=seq(0,1,0.1))
rayonmoy <-cut(train$Shortwave.Radiation.daily.sum..sfc., q)
barplot(prop.table(table(rayonmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8), las = 3,
main = "Shortwave.Radiation.daily.sum..sfc.", ylab = "Jours de pluie",density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement pour le rayonnement
train2 <- mutate(train2, rayonmoy = case_when(
Shortwave.Radiation.daily.sum..sfc.<3182 ~ "Faible rayonnement.",
Shortwave.Radiation.daily.sum..sfc.>= 3182 ~ "Rayonnem. eleve"))
rm(q,rayonmoy)
###############################################################################################################
# Vitesse du vent #
###############################################################################################################
q <-quantile(train$Wind.Speed.daily.mean..10.m.above.gnd,probs=seq(0,1,0.1))
ventmoy <-cut(train$Wind.Speed.daily.mean..10.m.above.gnd, q)
barplot(prop.table(table(ventmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
main = "Wind.Speed.daily.mean..10.m.above.gnd", ylab = "Jours de pluie",density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)#Relation linéaire assez clair. La discrétisation ne constitue pas une option pertinente.
rm(q,ventmoy)
###############################################################################################################
# Direction du vent #
###############################################################################################################
q <-quantile(train$Wind.Direction.daily.mean..10.m.above.gnd.,probs=seq(0,1,0.1))
directventmoy <-cut(train$Wind.Direction.daily.mean..10.m.above.gnd., q)
barplot(prop.table(table(directventmoy, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8), las = 3,
main = "Wind.Direction.daily.mean..10.m.above.gnd.", ylab = "Jours de pluie",
density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)# création d'une nouvelle variable dans la table d'entraînement pour la direction du vent
train2 <- mutate(train2, directventmoy = case_when(
Wind.Direction.daily.mean..10.m.above.gnd.<138 ~ "N-SE",
Wind.Direction.daily.mean..10.m.above.gnd.>= 138 ~ "SE-NO"))
rm(q,directventmoy)
###############################################################################################################
# Modele 7 #
###############################################################################################################
model7 <- step(glm(pluie.demain ~ .
- Date
- Total.Cloud.Cover.daily.mean..sfc.
- Mean.Sea.Level.Pressure.daily.mean..MSL.
- Total.Precipitation.daily.sum..sfc.
- Sunshine.Duration.daily.sum..sfc.
- Shortwave.Radiation.daily.sum..sfc.
- Wind.Direction.daily.mean..10.m.above.gnd.
+ I(Temperature.daily.mean..2.m.above.gnd.^2)
+ I(Wind.Speed.daily.mean..900.mb.^2)
+ I(Total.Cloud.Cover.daily.mean..sfc.^2)
+ I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)
+ I(Total.Precipitation.daily.sum..sfc.^2)
+ Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.
+ Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd., data=train2[d,],
family = binomial(link="logit")), direction = "backward", trace =F)
summary(model7)##
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. +
## Relative.Humidity.daily.mean..2.m.above.gnd. + High.Cloud.Cover.daily.mean..high.cld.lay. +
## Low.Cloud.Cover.daily.mean..low.cld.lay. + Wind.Speed.daily.mean..10.m.above.gnd. +
## Wind.Direction.daily.mean..900.mb. + Temperature.daily.min..2.m.above.gnd. +
## Mean.Sea.Level.Pressure.daily.min..MSL. + Total.Cloud.Cover.daily.min..sfc. +
## High.Cloud.Cover.daily.min..high.cld.lay. + Medium.Cloud.Cover.daily.max..mid.cld.lay. +
## Low.Cloud.Cover.daily.min..low.cld.lay. + Wind.Speed.daily.min..10.m.above.gnd. +
## Wind.Speed.daily.min..900.mb. + Mois + nebulositemoy + directventmoy +
## I(Temperature.daily.mean..2.m.above.gnd.^2) + I(Wind.Speed.daily.mean..900.mb.^2) +
## I(Total.Cloud.Cover.daily.mean..sfc.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) +
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.,
## family = binomial(link = "logit"), data = train2[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5279 -0.8646 -0.2855 0.8477 2.2343
##
## Coefficients:
## Estimate
## (Intercept) 7.741e+01
## Temperature.daily.mean..2.m.above.gnd. -1.320e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. -4.438e-02
## High.Cloud.Cover.daily.mean..high.cld.lay. 1.310e-02
## Low.Cloud.Cover.daily.mean..low.cld.lay. 1.580e-02
## Wind.Speed.daily.mean..10.m.above.gnd. -8.789e-02
## Wind.Direction.daily.mean..900.mb. 5.981e-03
## Temperature.daily.min..2.m.above.gnd. -3.117e-01
## Mean.Sea.Level.Pressure.daily.min..MSL. -4.448e-02
## Total.Cloud.Cover.daily.min..sfc. 2.855e-02
## High.Cloud.Cover.daily.min..high.cld.lay. -4.834e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 9.242e-03
## Low.Cloud.Cover.daily.min..low.cld.lay. -2.201e-02
## Wind.Speed.daily.min..10.m.above.gnd. 8.708e-02
## Wind.Speed.daily.min..900.mb. -3.280e-02
## Mois02 -1.779e+00
## Mois03 -1.392e+00
## Mois04 -1.645e+00
## Mois05 -9.294e-01
## Mois06 -7.301e-01
## Mois07 -1.246e+00
## Mois08 -1.835e+00
## Mois09 -1.530e+00
## Mois10 -1.192e+00
## Mois11 -1.203e+00
## Mois12 -5.214e-01
## nebulositemoyDegage ou partiellement couvert -6.283e-01
## directventmoySE-NO 4.040e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2) 7.906e-03
## I(Wind.Speed.daily.mean..900.mb.^2) 4.330e-04
## I(Total.Cloud.Cover.daily.mean..sfc.^2) -1.789e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -2.895e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 3.542e-03
## Std. Error
## (Intercept) 1.726e+01
## Temperature.daily.mean..2.m.above.gnd. 1.346e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. 2.383e-02
## High.Cloud.Cover.daily.mean..high.cld.lay. 6.432e-03
## Low.Cloud.Cover.daily.mean..low.cld.lay. 8.004e-03
## Wind.Speed.daily.mean..10.m.above.gnd. 3.541e-02
## Wind.Direction.daily.mean..900.mb. 1.491e-03
## Temperature.daily.min..2.m.above.gnd. 7.876e-02
## Mean.Sea.Level.Pressure.daily.min..MSL. 2.856e-02
## Total.Cloud.Cover.daily.min..sfc. 1.029e-02
## High.Cloud.Cover.daily.min..high.cld.lay. 2.968e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 2.764e-03
## Low.Cloud.Cover.daily.min..low.cld.lay. 9.925e-03
## Wind.Speed.daily.min..10.m.above.gnd. 3.807e-02
## Wind.Speed.daily.min..900.mb. 1.758e-02
## Mois02 4.830e-01
## Mois03 5.397e-01
## Mois04 5.899e-01
## Mois05 6.139e-01
## Mois06 6.508e-01
## Mois07 6.798e-01
## Mois08 6.726e-01
## Mois09 6.239e-01
## Mois10 5.216e-01
## Mois11 4.722e-01
## Mois12 4.515e-01
## nebulositemoyDegage ou partiellement couvert 3.772e-01
## directventmoySE-NO 2.604e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2) 1.970e-03
## I(Wind.Speed.daily.mean..900.mb.^2) 2.150e-04
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 1.095e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 1.511e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 1.499e-03
## z value
## (Intercept) 4.485
## Temperature.daily.mean..2.m.above.gnd. -0.980
## Relative.Humidity.daily.mean..2.m.above.gnd. -1.863
## High.Cloud.Cover.daily.mean..high.cld.lay. 2.036
## Low.Cloud.Cover.daily.mean..low.cld.lay. 1.974
## Wind.Speed.daily.mean..10.m.above.gnd. -2.482
## Wind.Direction.daily.mean..900.mb. 4.013
## Temperature.daily.min..2.m.above.gnd. -3.958
## Mean.Sea.Level.Pressure.daily.min..MSL. -1.557
## Total.Cloud.Cover.daily.min..sfc. 2.774
## High.Cloud.Cover.daily.min..high.cld.lay. -1.629
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 3.343
## Low.Cloud.Cover.daily.min..low.cld.lay. -2.218
## Wind.Speed.daily.min..10.m.above.gnd. 2.287
## Wind.Speed.daily.min..900.mb. -1.865
## Mois02 -3.683
## Mois03 -2.579
## Mois04 -2.788
## Mois05 -1.514
## Mois06 -1.122
## Mois07 -1.833
## Mois08 -2.729
## Mois09 -2.452
## Mois10 -2.285
## Mois11 -2.547
## Mois12 -1.155
## nebulositemoyDegage ou partiellement couvert -1.666
## directventmoySE-NO 1.552
## I(Temperature.daily.mean..2.m.above.gnd.^2) 4.013
## I(Wind.Speed.daily.mean..900.mb.^2) 2.014
## I(Total.Cloud.Cover.daily.mean..sfc.^2) -1.634
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -1.916
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 2.363
## Pr(>|z|)
## (Intercept) 7.28e-06
## Temperature.daily.mean..2.m.above.gnd. 0.326936
## Relative.Humidity.daily.mean..2.m.above.gnd. 0.062498
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.041743
## Low.Cloud.Cover.daily.mean..low.cld.lay. 0.048326
## Wind.Speed.daily.mean..10.m.above.gnd. 0.013052
## Wind.Direction.daily.mean..900.mb. 6.01e-05
## Temperature.daily.min..2.m.above.gnd. 7.56e-05
## Mean.Sea.Level.Pressure.daily.min..MSL. 0.119421
## Total.Cloud.Cover.daily.min..sfc. 0.005538
## High.Cloud.Cover.daily.min..high.cld.lay. 0.103376
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000828
## Low.Cloud.Cover.daily.min..low.cld.lay. 0.026586
## Wind.Speed.daily.min..10.m.above.gnd. 0.022187
## Wind.Speed.daily.min..900.mb. 0.062132
## Mois02 0.000231
## Mois03 0.009900
## Mois04 0.005305
## Mois05 0.130075
## Mois06 0.261941
## Mois07 0.066750
## Mois08 0.006359
## Mois09 0.014210
## Mois10 0.022311
## Mois11 0.010866
## Mois12 0.248160
## nebulositemoyDegage ou partiellement couvert 0.095728
## directventmoySE-NO 0.120767
## I(Temperature.daily.mean..2.m.above.gnd.^2) 5.99e-05
## I(Wind.Speed.daily.mean..900.mb.^2) 0.044060
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 0.102335
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 0.055383
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.018136
##
## (Intercept) ***
## Temperature.daily.mean..2.m.above.gnd.
## Relative.Humidity.daily.mean..2.m.above.gnd. .
## High.Cloud.Cover.daily.mean..high.cld.lay. *
## Low.Cloud.Cover.daily.mean..low.cld.lay. *
## Wind.Speed.daily.mean..10.m.above.gnd. *
## Wind.Direction.daily.mean..900.mb. ***
## Temperature.daily.min..2.m.above.gnd. ***
## Mean.Sea.Level.Pressure.daily.min..MSL.
## Total.Cloud.Cover.daily.min..sfc. **
## High.Cloud.Cover.daily.min..high.cld.lay.
## Medium.Cloud.Cover.daily.max..mid.cld.lay. ***
## Low.Cloud.Cover.daily.min..low.cld.lay. *
## Wind.Speed.daily.min..10.m.above.gnd. *
## Wind.Speed.daily.min..900.mb. .
## Mois02 ***
## Mois03 **
## Mois04 **
## Mois05
## Mois06
## Mois07 .
## Mois08 **
## Mois09 *
## Mois10 *
## Mois11 *
## Mois12
## nebulositemoyDegage ou partiellement couvert .
## directventmoySE-NO
## I(Temperature.daily.mean..2.m.above.gnd.^2) ***
## I(Wind.Speed.daily.mean..900.mb.^2) *
## I(Total.Cloud.Cover.daily.mean..sfc.^2)
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) .
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 866.85 on 799 degrees of freedom
## AIC: 932.85
##
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 7")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 7"
pchisq(898.08,853, lower = F)## [1] 0.1381116
# La significativite globale du modèle s'améliore un peu tout comme l'AIC.
###############################################################################################################
# Modele 8 #
###############################################################################################################
# Pour pallier les éventuels problèmes de colinéarité entre variables, nous choisissons de calculer les
# amplitudes entre valeurs minimales et maximales au cours de la journée pour plusieurs variables.
train2$amplitud_temp.2m.gnd<-(train2$Temperature.daily.max..2.m.above.gnd.-train2$Temperature.daily.min..2.m.above.gnd.)
train2$amplitud_humid.2m.gnd<-(train2$Relative.Humidity.daily.max..2.m.above.gnd.-train2$Relative.Humidity.daily.min..2.m.above.gnd.)
train2$amplitud_vitesse_vent.10.m<-(train2$Wind.Speed.daily.max..10.m.above.gnd.-train2$Wind.Speed.daily.min..10.m.above.gnd.)
train2$amplitud_pression<-(train2$Mean.Sea.Level.Pressure.daily.max..MSL. -train2$Mean.Sea.Level.Pressure.daily.min..MSL.)
train2$amplitud_nebulosite<-(train2$Total.Cloud.Cover.daily.max..sfc. -train2$Total.Cloud.Cover.daily.min..sfc.)
train2$amplitud_rafale <-(train2$Wind.Gust.daily.max..sfc. -train2$Wind.Gust.daily.min..sfc.)
model8 <- step(glm(pluie.demain ~ .
- Date
- Total.Cloud.Cover.daily.mean..sfc.
- Mean.Sea.Level.Pressure.daily.mean..MSL.
- Total.Precipitation.daily.sum..sfc.
- Sunshine.Duration.daily.sum..sfc.
- Shortwave.Radiation.daily.sum..sfc.
- Wind.Direction.daily.mean..10.m.above.gnd.
- Wind.Direction.daily.mean..80.m.above.gnd.
- Wind.Direction.daily.mean..900.mb.
- Temperature.daily.max..2.m.above.gnd.-Temperature.daily.min..2.m.above.gnd.
- Relative.Humidity.daily.max..2.m.above.gnd.-Relative.Humidity.daily.min..2.m.above.gnd.
- Wind.Speed.daily.max..10.m.above.gnd.-Wind.Speed.daily.min..10.m.above.gnd.
- Wind.Speed.daily.max..80.m.above.gnd.-Wind.Speed.daily.min..80.m.above.gnd.
- Wind.Speed.daily.max..900.mb.-Wind.Speed.daily.min..900.mb.
- Wind.Speed.daily.mean..80.m.above.gnd.
- Mean.Sea.Level.Pressure.daily.max..MSL.-Mean.Sea.Level.Pressure.daily.min..MSL.
- Total.Cloud.Cover.daily.max..sfc. - Total.Cloud.Cover.daily.min..sfc.
- High.Cloud.Cover.daily.max..high.cld.lay. - High.Cloud.Cover.daily.min..high.cld.lay.
- High.Cloud.Cover.daily.mean..high.cld.lay.
- Medium.Cloud.Cover.daily.max..mid.cld.lay. - Medium.Cloud.Cover.daily.min..mid.cld.lay.
- Medium.Cloud.Cover.daily.mean..mid.cld.lay.
- Low.Cloud.Cover.daily.max..low.cld.lay. - Low.Cloud.Cover.daily.min..low.cld.lay.
- Low.Cloud.Cover.daily.mean..low.cld.lay.
- Wind.Gust.daily.max..sfc.
- Wind.Gust.daily.min..sfc.
+ I(Temperature.daily.mean..2.m.above.gnd.^2)
+ I(Wind.Speed.daily.mean..900.mb.^2)
+ I(Total.Cloud.Cover.daily.mean..sfc.^2)
+ I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)
+ I(Total.Precipitation.daily.sum..sfc.^2)
+ Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.
+ Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd., data=train2[d,],
family = binomial(link="logit")), direction = "backward", trace =F)
summary(model8)##
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. +
## Relative.Humidity.daily.mean..2.m.above.gnd. + Wind.Speed.daily.mean..10.m.above.gnd. +
## Wind.Speed.daily.mean..900.mb. + Mois + precipmoy + directventmoy +
## amplitud_temp.2m.gnd + amplitud_pression + I(Temperature.daily.mean..2.m.above.gnd.^2) +
## I(Total.Cloud.Cover.daily.mean..sfc.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) +
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. +
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.,
## family = binomial(link = "logit"), data = train2[d, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2823 -0.8942 -0.3373 0.8928 2.5469
##
## Coefficients:
## Estimate
## (Intercept) 5.768e+01
## Temperature.daily.mean..2.m.above.gnd. -4.880e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. -5.161e-02
## Wind.Speed.daily.mean..10.m.above.gnd. -5.719e-02
## Wind.Speed.daily.mean..900.mb. -4.148e-02
## Mois02 -1.683e+00
## Mois03 -1.437e+00
## Mois04 -1.521e+00
## Mois05 -8.849e-01
## Mois06 -6.601e-01
## Mois07 -1.280e+00
## Mois08 -1.779e+00
## Mois09 -1.376e+00
## Mois10 -1.192e+00
## Mois11 -9.747e-01
## Mois12 -5.874e-01
## precipmoyPrecip. moy. ou forte 1.844e-01
## precipmoyPrecipit. nulle -3.929e-01
## directventmoySE-NO 4.054e-01
## amplitud_temp.2m.gnd 1.814e-01
## amplitud_pression 4.259e-02
## I(Temperature.daily.mean..2.m.above.gnd.^2) 8.650e-03
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 1.590e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -5.209e-05
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. 2.361e-04
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 3.843e-03
## Std. Error
## (Intercept) 7.631e+00
## Temperature.daily.mean..2.m.above.gnd. 1.257e-01
## Relative.Humidity.daily.mean..2.m.above.gnd. 2.219e-02
## Wind.Speed.daily.mean..10.m.above.gnd. 2.753e-02
## Wind.Speed.daily.mean..900.mb. 1.624e-02
## Mois02 4.652e-01
## Mois03 4.963e-01
## Mois04 5.465e-01
## Mois05 5.581e-01
## Mois06 5.943e-01
## Mois07 6.220e-01
## Mois08 6.185e-01
## Mois09 5.809e-01
## Mois10 4.885e-01
## Mois11 4.469e-01
## Mois12 4.333e-01
## precipmoyPrecip. moy. ou forte 3.206e-01
## precipmoyPrecipit. nulle 3.267e-01
## directventmoySE-NO 2.424e-01
## amplitud_temp.2m.gnd 4.199e-02
## amplitud_pression 2.694e-02
## I(Temperature.daily.mean..2.m.above.gnd.^2) 1.998e-03
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 4.039e-05
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 6.930e-06
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. 6.073e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 1.413e-03
## z value
## (Intercept) 7.558
## Temperature.daily.mean..2.m.above.gnd. -3.882
## Relative.Humidity.daily.mean..2.m.above.gnd. -2.325
## Wind.Speed.daily.mean..10.m.above.gnd. -2.078
## Wind.Speed.daily.mean..900.mb. -2.555
## Mois02 -3.617
## Mois03 -2.895
## Mois04 -2.783
## Mois05 -1.586
## Mois06 -1.111
## Mois07 -2.057
## Mois08 -2.876
## Mois09 -2.369
## Mois10 -2.440
## Mois11 -2.181
## Mois12 -1.356
## precipmoyPrecip. moy. ou forte 0.575
## precipmoyPrecipit. nulle -1.203
## directventmoySE-NO 1.673
## amplitud_temp.2m.gnd 4.321
## amplitud_pression 1.581
## I(Temperature.daily.mean..2.m.above.gnd.^2) 4.330
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 3.938
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) -7.516
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. 3.888
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 2.719
## Pr(>|z|)
## (Intercept) 4.09e-14
## Temperature.daily.mean..2.m.above.gnd. 0.000104
## Relative.Humidity.daily.mean..2.m.above.gnd. 0.020047
## Wind.Speed.daily.mean..10.m.above.gnd. 0.037724
## Wind.Speed.daily.mean..900.mb. 0.010628
## Mois02 0.000297
## Mois03 0.003795
## Mois04 0.005385
## Mois05 0.112842
## Mois06 0.266725
## Mois07 0.039647
## Mois08 0.004023
## Mois09 0.017825
## Mois10 0.014694
## Mois11 0.029178
## Mois12 0.175159
## precipmoyPrecip. moy. ou forte 0.565285
## precipmoyPrecipit. nulle 0.229072
## directventmoySE-NO 0.094394
## amplitud_temp.2m.gnd 1.55e-05
## amplitud_pression 0.113848
## I(Temperature.daily.mean..2.m.above.gnd.^2) 1.49e-05
## I(Total.Cloud.Cover.daily.mean..sfc.^2) 8.23e-05
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) 5.64e-14
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. 0.000101
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.006544
##
## (Intercept) ***
## Temperature.daily.mean..2.m.above.gnd. ***
## Relative.Humidity.daily.mean..2.m.above.gnd. *
## Wind.Speed.daily.mean..10.m.above.gnd. *
## Wind.Speed.daily.mean..900.mb. *
## Mois02 ***
## Mois03 **
## Mois04 **
## Mois05
## Mois06
## Mois07 *
## Mois08 **
## Mois09 *
## Mois10 *
## Mois11 *
## Mois12
## precipmoyPrecip. moy. ou forte
## precipmoyPrecipit. nulle
## directventmoySE-NO .
## amplitud_temp.2m.gnd ***
## amplitud_pression
## I(Temperature.daily.mean..2.m.above.gnd.^2) ***
## I(Total.Cloud.Cover.daily.mean..sfc.^2) ***
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) ***
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. ***
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1152.01 on 831 degrees of freedom
## Residual deviance: 902.57 on 806 degrees of freedom
## AIC: 954.57
##
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 7")## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 7"
pchisq(925.23,858, lower = F)## [1] 0.05497837
# Le modéle est juste au dessus du seuil de significativite globale à 5%. Mais il présente l'avantage d'avoir moins, ou peu, de prédicteurs corrélés et d'être en outre plus parcimonieux. < Au regard des AUC, relativement proches entre modèles, le modèle 7 présente les meilleurs résultats.
###############################################################################################################
# Courbes de ROC des modèles 2, 5, 7 et 8 #
###############################################################################################################
#Courbe de ROC des 4 modèles : 2, 5, 7 et 8
predM2 = predict(model2, newdata = train2[d,], type = "response")
predM5 = predict(model5, newdata = train2[d,], type = "response")
predM7 = predict(model7, newdata = train2[d,], type = "response")
predM8 = predict(model8, newdata = train2[d,], type = "response")
par(pty="s")
#Modèle 2
roc(train2$pluie.demain[d],predM2, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")##
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM2, percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs", ylab = "% vrais positifs")
##
## Data: predM2 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 80.65%
#Modèle 5
roc(train2$pluie.demain[d],predM5, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")##
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM5, percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs", ylab = "% vrais positifs")
##
## Data: predM5 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 81.9%
#Modèle 7
roc(train2$pluie.demain[d],predM7, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")##
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM7, percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs", ylab = "% vrais positifs")
##
## Data: predM7 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 81.66%
#Modèle 8
roc(train2$pluie.demain[d],predM8, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")##
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM8, percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs", ylab = "% vrais positifs")
##
## Data: predM8 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 79.91%
```
#Au seuil de 55%, l'erreur moyenne est minimum (0,27)
preditM5 = (predM5 >= 0.55)
print('Matrice de confusion du modèle 5 au seuil de 55 %')## [1] "Matrice de confusion du modèle 5 au seuil de 55 %"
table("PREDITS" = preditM5, "OBSERVES"=train2$pluie.demain[d])## OBSERVES
## PREDITS FALSE TRUE
## FALSE 345 128
## TRUE 88 271
mean(abs(preditM5 - train2[d, "pluie.demain"]), na.rm = T)## [1] 0.2596154
#On obtient 74,8% de bonnes prédictions de jours de pluie
###############################################################################################################
# Validation croisee pour le modele 7 #
###############################################################################################################
seuil = seq(0, 1, by=.00001)
{res = rep(NA, length(seuil))
for(i in 1:length(seuil)){
pred = (predM7 >= seuil[i])
res[i]=mean(abs(pred - train2[d, "pluie.demain"]), na.rm = T)
}
}
seuil[which.min(res)]## [1] 0.57366
#Au seuil de 47%, l'erreur moyenne est minimum (0,25)
preditM7 = (predM7 >= 0.47)
print('Matrice de confusion du modèle 7 au seuil de 47 %')## [1] "Matrice de confusion du modèle 7 au seuil de 47 %"
table("PREDITS" = preditM7, "OBSERVES"=train2$pluie.demain[d])## OBSERVES
## PREDITS FALSE TRUE
## FALSE 306 103
## TRUE 127 296
mean(abs(preditM7 - train2[d, "pluie.demain"]), na.rm = T)## [1] 0.2764423
#On obtient 79% de bonnes prédictions de jours de pluie
###############################################################################################################
# Validation croisee pour le modele 8 #
###############################################################################################################
seuil = seq(0, 1, by=.00001)
{res = rep(NA, length(seuil))
for(i in 1:length(seuil)){
pred = (predM8 >= seuil[i])
res[i]=mean(abs(pred - train2[d, "pluie.demain"]), na.rm = T)
}
}
seuil[which.min(res)]## [1] 0.60976
#Au seuil de 45%, l'erreur moyenne est minimum (0,26)
preditM8 = (predM8 >= 0.45)
print('Matrice de confusion du modèle 8 au seuil de 45 %')## [1] "Matrice de confusion du modèle 8 au seuil de 45 %"
table("PREDITS" = preditM8, "OBSERVES"=train2$pluie.demain[d])## OBSERVES
## PREDITS FALSE TRUE
## FALSE 293 97
## TRUE 140 302
mean(abs(preditM8 - train2[d, "pluie.demain"]), na.rm = T)## [1] 0.2848558
#On obtient 81% de bonnes prédictions de jours de pluie. Par contre, est sensiblement dégradé le taux de faux positifs (jours de plui prédits à tort).#creation des variables discretisees dans le jeu de donnees TEST
test$nebulositemoy =NA
test$nebulositemoy[test$Total.Cloud.Cover.daily.mean..sfc.< 63]="Degage ou partiellement couvert"
test$nebulositemoy[test$Total.Cloud.Cover.daily.mean..sfc.>=63]="Couvert"
test$pressionmoy =NA
test$pressionmoy[test$Mean.Sea.Level.Pressure.daily.mean..MSL.< 1017]="Pression basse et moyenne"
test$pressionmoy[test$Mean.Sea.Level.Pressure.daily.mean..MSL.>=1017]="Haute pression"
test <- mutate(test, precipmoy = case_when(
Total.Precipitation.daily.sum..sfc. ==0 ~ "Precipit. nulle",
Total.Precipitation.daily.sum..sfc. > 0 & Total.Precipitation.daily.sum..sfc. <= 0.2 ~ "Precip. faible",
Total.Precipitation.daily.sum..sfc. > 0.2 ~ "Precip. moy. ou forte"))
test <- mutate(test, neigemoy = case_when(
Snowfall.amount.raw.daily.sum..sfc. ==0 ~ "Pas de neige",
Snowfall.amount.raw.daily.sum..sfc. > 0 ~ "Chute de neige"))
test <- mutate(test, soleilmoy = case_when(
Sunshine.Duration.daily.sum..sfc. ==0 ~ "Pas d'ensoleillem.",
Sunshine.Duration.daily.sum..sfc. > 0 & Sunshine.Duration.daily.sum..sfc.<=328 ~ "Peu d'ensoleillem.",
Sunshine.Duration.daily.sum..sfc. > 328 ~ "Ensoleillem."))
test <- mutate(test, rayonmoy = case_when(
Shortwave.Radiation.daily.sum..sfc.<3182 ~ "Faible rayonnement.",
Shortwave.Radiation.daily.sum..sfc.>= 3182 ~ "Rayonnem. eleve"))
test <- mutate(test, directventmoy = case_when(
Wind.Direction.daily.mean..10.m.above.gnd.<138 ~ "N-SE",
Wind.Direction.daily.mean..10.m.above.gnd.>= 138 ~ "SE-NO"))
#prediction avec le modele 7
resultat <- predict(model7,test,type = "response")
resultat <- cbind(test,resultat)
colnames(resultat)[colnames(resultat)=="resultat"] <- "Proba_estimee"
resultat$pluie.lendemain<-ifelse(resultat$Proba_estimee>0.47,T,F)
write.csv(resultat, file = "resultat.csv", row.names =F)